library(reticulate)
## Warning: package 'reticulate' was built under R version 3.6.2
#mac environment
use_python('/usr/local/bin/python3')
import pandas as pd
import plotly.express as px
import plotly.offline as py
import plotly.graph_objects as go
#read data
accounts = pd.read_csv('data/accounts_analytical.csv')
loans = accounts.dropna(subset=["loan_amount", "loan_payment"])
x0 = loans.loc[loans.loan_default == False, 'loan_date'].values
x1 = loans.loc[loans.loan_default == True, 'loan_date'].values
fig = go.Figure()
data1 = go.Histogram(x=x0, name = 'Not Default', opacity=0.7)
data2 = go.Histogram(x=x1, name = 'Default', opacity=0.7)
data = [data1, data2]
layout = dict(barmode='overlay', title = "Histogram of loans counts by loan_date", xaxis = dict(title = 'loan_date'))
fig = dict(data = data, layout = layout)
py.plot(fig, filename = "loan_date.html", auto_open = False)
## 'loan_date.html'
htmltools::includeHTML("loan_date.html")
x0 = loans.loc[loans.loan_default == False, 'loan_amount'].values
x1 = loans.loc[loans.loan_default == True, 'loan_amount'].values
fig = go.Figure()
data1 = go.Histogram(x=x0, name = 'Not Default', opacity=0.7)
data2 = go.Histogram(x=x1, name = 'Default', opacity=0.7)
data = [data1, data2]
layout = dict(barmode='overlay', title = "Histogram of loans counts by loan_amount", xaxis = dict(title = 'loan_amount'))
fig = dict(data = data, layout = layout)
py.plot(fig, filename = "loan_amount.html", auto_open = False)
## 'loan_amount.html'
htmltools::includeHTML("loan_amount.html")
x0 = loans.loc[loans.loan_default == False, 'loan_payment'].values
x1 = loans.loc[loans.loan_default == True, 'loan_payment'].values
fig = go.Figure()
data1 = go.Histogram(x=x0, name = 'Not Default', opacity=0.7)
data2 = go.Histogram(x=x1, name = 'Default', opacity=0.7)
data = [data1, data2]
layout = dict(barmode='overlay', title = "Histogram of loans counts by loan_payment",
xaxis = dict(title = 'loan_payment'))
fig = dict(data = data, layout = layout)
py.plot(fig, filename = "loan_payment.html", auto_open = False)
## 'loan_payment.html'
htmltools::includeHTML("loan_payment.html")
x0 = loans.loc[loans.loan_default == False, 'loan_term'].values
x1 = loans.loc[loans.loan_default == True, 'loan_term'].values
fig = go.Figure()
data1 = go.Histogram(x=x0, name = 'Not Default', opacity=0.7)
data2 = go.Histogram(x=x1, name = 'Default', opacity=0.7)
data = [data1, data2]
layout = dict(barmode='overlay', title = "Histogram of loans counts by loan_term",
xaxis = dict(title = 'loan_term'))
fig = dict(data = data, layout = layout)
py.plot(fig, filename = "loan_term.html", auto_open = False)
## 'loan_term.html'
htmltools::includeHTML("loan_term.html")
x0 = loans.loc[loans.loan_default == False, 'loan_status'].value_counts()
x1 = loans.loc[loans.loan_default == True, 'loan_status'].value_counts()
data=[go.Bar(name='Not default', x=x0.index, y=x0.values),
go.Bar(name='Default', x=x1.index, y= x1.values)]
layout = go.Layout(title = "Bar Chart of loans counts by loan_status",
xaxis = dict(title = 'loan_status'))
fig = dict(data = data, layout = layout)
py.plot(fig, filename = "loan_status.html", auto_open = False)
## 'loan_status.html'
htmltools::includeHTML("loan_status.html")
The loans issued between 1993 to 1998, and the loans that issued between 1996 to 1997 have a higher chance to go into default.
Most loans are under 200K and the loans that are between 450K to 500K are easier going into default.
The loan payment attribute follows a normal distribution.
The loan term doesn’t make a lot of difference in terms of default but there are no loans paid in 50 to 60 months.
The ratio of expired loans have higher default rate.